{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Relational Topic Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "import logging\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from ptm import RelationalTopicModel\n",
    "from ptm.utils import convert_cnt_to_list, get_top_words\n",
    "\n",
    "logger = logging.getLogger('RelationalTopicModel')\n",
    "logger.propagate=False\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load CORA dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "doc_ids = pickle.load(open('../data/cora/doc_ids.pkl', 'rb'))\n",
    "doc_cnt = pickle.load(open('../data/cora/doc_cnt.pkl', 'rb'))\n",
    "doc_links = pickle.load(open('../data/cora/doc_links_sym.pkl', 'rb'))\n",
    "voca = pickle.load(open('../data/cora/voca.pkl', 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "n_doc = len(doc_ids)\n",
    "n_topic = 10\n",
    "n_voca = len(voca)\n",
    "max_iter = 50"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fit model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2016-02-18 09:53:07 INFO:RelationalTopicModel:Initialize RTM: num_voca:17059, num_topic:10, num_doc:13147\n",
      "2016-02-18 09:53:14 INFO:RelationalTopicModel:[ITER]   0,\tElapsed time: 5.057\tELBO: -7558267.416\n",
      "2016-02-18 09:53:19 INFO:RelationalTopicModel:[ITER]   1,\tElapsed time: 5.066\tELBO: -7555930.907\n",
      "2016-02-18 09:53:24 INFO:RelationalTopicModel:[ITER]   2,\tElapsed time: 5.277\tELBO: -7553953.242\n",
      "2016-02-18 09:53:30 INFO:RelationalTopicModel:[ITER]   3,\tElapsed time: 5.819\tELBO: -7551585.770\n",
      "2016-02-18 09:53:35 INFO:RelationalTopicModel:[ITER]   4,\tElapsed time: 5.233\tELBO: -7547984.223\n",
      "2016-02-18 09:53:41 INFO:RelationalTopicModel:[ITER]   5,\tElapsed time: 5.650\tELBO: -7542205.711\n",
      "2016-02-18 09:53:46 INFO:RelationalTopicModel:[ITER]   6,\tElapsed time: 5.062\tELBO: -7533498.895\n",
      "2016-02-18 09:53:52 INFO:RelationalTopicModel:[ITER]   7,\tElapsed time: 5.678\tELBO: -7521541.780\n",
      "2016-02-18 09:53:57 INFO:RelationalTopicModel:[ITER]   8,\tElapsed time: 5.574\tELBO: -7506539.192\n",
      "2016-02-18 09:54:03 INFO:RelationalTopicModel:[ITER]   9,\tElapsed time: 5.747\tELBO: -7489166.663\n",
      "2016-02-18 09:54:08 INFO:RelationalTopicModel:[ITER]  10,\tElapsed time: 5.443\tELBO: -7470531.973\n",
      "2016-02-18 09:54:14 INFO:RelationalTopicModel:[ITER]  11,\tElapsed time: 5.647\tELBO: -7452060.141\n",
      "2016-02-18 09:54:20 INFO:RelationalTopicModel:[ITER]  12,\tElapsed time: 5.465\tELBO: -7434975.754\n",
      "2016-02-18 09:54:25 INFO:RelationalTopicModel:[ITER]  13,\tElapsed time: 5.545\tELBO: -7419959.908\n",
      "2016-02-18 09:54:30 INFO:RelationalTopicModel:[ITER]  14,\tElapsed time: 5.240\tELBO: -7407361.898\n",
      "2016-02-18 09:54:36 INFO:RelationalTopicModel:[ITER]  15,\tElapsed time: 5.247\tELBO: -7397150.490\n",
      "2016-02-18 09:54:41 INFO:RelationalTopicModel:[ITER]  16,\tElapsed time: 5.160\tELBO: -7389101.911\n",
      "2016-02-18 09:54:46 INFO:RelationalTopicModel:[ITER]  17,\tElapsed time: 5.285\tELBO: -7382819.209\n",
      "2016-02-18 09:54:51 INFO:RelationalTopicModel:[ITER]  18,\tElapsed time: 5.447\tELBO: -7377980.009\n",
      "2016-02-18 09:54:57 INFO:RelationalTopicModel:[ITER]  19,\tElapsed time: 5.430\tELBO: -7374338.836\n",
      "2016-02-18 09:55:03 INFO:RelationalTopicModel:[ITER]  20,\tElapsed time: 5.659\tELBO: -7371643.396\n",
      "2016-02-18 09:55:08 INFO:RelationalTopicModel:[ITER]  21,\tElapsed time: 5.646\tELBO: -7369688.903\n",
      "2016-02-18 09:55:13 INFO:RelationalTopicModel:[ITER]  22,\tElapsed time: 4.985\tELBO: -7368338.195\n",
      "2016-02-18 09:55:19 INFO:RelationalTopicModel:[ITER]  23,\tElapsed time: 5.319\tELBO: -7367446.391\n",
      "2016-02-18 09:55:24 INFO:RelationalTopicModel:[ITER]  24,\tElapsed time: 5.260\tELBO: -7366919.001\n",
      "2016-02-18 09:55:29 INFO:RelationalTopicModel:[ITER]  25,\tElapsed time: 5.182\tELBO: -7366663.674\n",
      "2016-02-18 09:55:34 INFO:RelationalTopicModel:[ITER]  26,\tElapsed time: 5.156\tELBO: -7366624.061\n",
      "2016-02-18 09:55:40 INFO:RelationalTopicModel:[ITER]  27,\tElapsed time: 5.892\tELBO: -7366759.507\n",
      "2016-02-18 09:55:45 INFO:RelationalTopicModel:[ITER]  28,\tElapsed time: 5.379\tELBO: -7367038.713\n",
      "2016-02-18 09:55:51 INFO:RelationalTopicModel:[ITER]  29,\tElapsed time: 5.349\tELBO: -7367430.856\n",
      "2016-02-18 09:55:56 INFO:RelationalTopicModel:[ITER]  30,\tElapsed time: 5.617\tELBO: -7367911.771\n",
      "2016-02-18 09:56:02 INFO:RelationalTopicModel:[ITER]  31,\tElapsed time: 5.716\tELBO: -7368443.710\n",
      "2016-02-18 09:56:08 INFO:RelationalTopicModel:[ITER]  32,\tElapsed time: 5.658\tELBO: -7368992.404\n",
      "2016-02-18 09:56:13 INFO:RelationalTopicModel:[ITER]  33,\tElapsed time: 5.613\tELBO: -7369555.693\n",
      "2016-02-18 09:56:19 INFO:RelationalTopicModel:[ITER]  34,\tElapsed time: 5.816\tELBO: -7370132.692\n",
      "2016-02-18 09:56:25 INFO:RelationalTopicModel:[ITER]  35,\tElapsed time: 5.670\tELBO: -7370722.740\n",
      "2016-02-18 09:56:30 INFO:RelationalTopicModel:[ITER]  36,\tElapsed time: 5.491\tELBO: -7371328.853\n",
      "2016-02-18 09:56:36 INFO:RelationalTopicModel:[ITER]  37,\tElapsed time: 5.453\tELBO: -7371934.003\n",
      "2016-02-18 09:56:41 INFO:RelationalTopicModel:[ITER]  38,\tElapsed time: 5.514\tELBO: -7372531.831\n",
      "2016-02-18 09:56:47 INFO:RelationalTopicModel:[ITER]  39,\tElapsed time: 5.513\tELBO: -7373139.290\n",
      "2016-02-18 09:56:52 INFO:RelationalTopicModel:[ITER]  40,\tElapsed time: 5.691\tELBO: -7373752.452\n",
      "2016-02-18 09:56:58 INFO:RelationalTopicModel:[ITER]  41,\tElapsed time: 5.561\tELBO: -7374371.438\n",
      "2016-02-18 09:57:04 INFO:RelationalTopicModel:[ITER]  42,\tElapsed time: 5.556\tELBO: -7374985.691\n",
      "2016-02-18 09:57:09 INFO:RelationalTopicModel:[ITER]  43,\tElapsed time: 5.587\tELBO: -7375587.696\n",
      "2016-02-18 09:57:15 INFO:RelationalTopicModel:[ITER]  44,\tElapsed time: 5.522\tELBO: -7376178.072\n",
      "2016-02-18 09:57:20 INFO:RelationalTopicModel:[ITER]  45,\tElapsed time: 5.597\tELBO: -7376748.420\n",
      "2016-02-18 09:57:26 INFO:RelationalTopicModel:[ITER]  46,\tElapsed time: 5.610\tELBO: -7377299.373\n",
      "2016-02-18 09:57:31 INFO:RelationalTopicModel:[ITER]  47,\tElapsed time: 5.431\tELBO: -7377820.927\n",
      "2016-02-18 09:57:37 INFO:RelationalTopicModel:[ITER]  48,\tElapsed time: 5.701\tELBO: -7378326.535\n",
      "2016-02-18 09:57:43 INFO:RelationalTopicModel:[ITER]  49,\tElapsed time: 5.459\tELBO: -7378829.569\n"
     ]
    }
   ],
   "source": [
    "model = RelationalTopicModel(n_topic, n_doc, n_voca, verbose=True)\n",
    "model.fit(doc_ids, doc_cnt, doc_links, max_iter=max_iter)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Print top probability words for each topic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Topic 0 : program,data,language,programming,parallel,analysis,code,paper,compiler,optimization\n",
      "Topic 1 : network,service,protocol,performance,paper,routing,communication,application,traffic,control\n",
      "Topic 2 : system,object,data,database,file,information,distributed,server,access,user\n",
      "Topic 3 : algorithm,problem,time,show,bound,result,number,graph,solution,paper\n",
      "Topic 4 : model,algorithm,method,network,function,learning,problem,result,neural,set\n",
      "Topic 5 : agent,research,science,system,part,grant,paper,work,supported,computer\n",
      "Topic 6 : system,query,planning,language,rule,paper,information,approach,problem,plan\n",
      "Topic 7 : system,design,software,paper,model,application,specification,component,protocol,user\n",
      "Topic 8 : learning,model,system,image,data,algorithm,approach,using,task,method\n",
      "Topic 9 : system,performance,memory,application,parallel,communication,processor,cache,paper,architecture\n"
     ]
    }
   ],
   "source": [
    "for k in range(n_topic):\n",
    "    top_words = get_top_words(model.beta, voca, k, 10)\n",
    "    print('Topic', k, ':', ','.join(top_words))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
